We are going to use K-means method to cluster the groups with similar characteristics.
The data we used is from Statistics Sweden(SCB). The analysis is used for my master thesis named "The Impact of Swedish Public Finance Factors on the Local Real Estate Market — Based on the GMM PVAR Approach".
import osmnx as ox, geopandas as gpd
%matplotlib inline
ox.config(log_console=True, use_cache=True)
link = './gadm36_SWE_shp/gadm36_SWE_2.dbf'
shape = gpd.read_file(link)
type(shape)
%matplotlib inline
import seaborn as sns
import pandas as pd
from pysal.lib import weights
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import cluster
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from statsmodels.api import OLS
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pandas.plotting import scatter_matrix
import seaborn as sns
ave = pd.read_csv("./22_cluster.csv",header = 0)
ave.head()
ave.rename(columns = {'Municipality': 'NAME_2'},
inplace = True)
join = pd.merge(ave,shape,how = 'left',on = ['NAME_2'])
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mapclassify
from geopandas import GeoDataFrame
ave = GeoDataFrame(ave)
attribute = ["ave_tax","ave_edu_rate","ave_dipo_inc","ave_pop_den","ave_employ_rate","ave_inmi_rate"]
join.crs = shape.crs
from geopandas import GeoDataFrame
join = GeoDataFrame(join)
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format ='retina'
import seaborn as sns
import geopandas as gpd
import palettable as pltt
from seaborn import palplot
import osmnx as ox, geopandas as gpd
ox.config(log_console=True, use_cache=True)
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
# For adding basemap
import contextily as cx
from sklearn import preprocessing
from sklearn import cluster
def choro():
f, axs = plt.subplots(nrows = 2, ncols = 3, figsize=(30,30)) # Set figure layout
axs = axs.flatten()
for i, columns in enumerate(attribute):
ax = axs[i]
join.plot(column=columns, scheme='fisher_jenks', k=6,
cmap=plt.cm.Blues, alpha=0.8, ax=ax,
edgecolor='w', linewidth=0.4) # Plot blue choropleth, set figure appearance parameters
f.suptitle('Choropleth of \n' + columns, fontsize='x-large') # Give a title, set font size and location
ax.set_axis_off()
ax.set_title(columns)
plt.axis('equal') # Adjust the figure
cx.add_basemap(ax, crs=join.crs); # Add basemap
f.tight_layout()
plt.savefig('choro.png',bbox_inches='tight')
plt.show()
choro()